

% Estimates linear conditional logit model.
% p( Y = j | X = x_t) = exp(x_jt'theta)/sum_k^J( exp(x_jt'theta) )
% Inputs:   y (nobs x 1) - choice variable, J choices
%           X (nobs x (J x theta)) 


function [b] = clogit(y,X)

% Treat NaN as missing values:

D = sum(isnan([y X]),2);
D = (D==0);
D = logical(D);
y = y(D);
X = X(D,:);


n = size(y,1);

% Check for colinearities:
% [b]=regress(y,X);
% dropped = logical(b(:)==0);
% keepX   = logical(1-dropped);
% X = X(:,keepX);
% 
% k = size(X,2);
% 
% [InitialBeta] = ols(y,X);

% Number of choices:
JSet = sort(unique(y));
J  = length(JSet);

% Choice index:
yidx = zeros(n,1); 
for j=1:J
    yidx(y==JSet(j)) = j;
end

% Initial beta:
b0   = ones(size(X,2)/J,1);
npar = length(b0);

options =  optimoptions('fminunc',                           ...
                        'Algorithm',        'quasi-newton',  ...
                        'DerivativeCheck',  'off',           ...
                        'GradObj',          'on',            ...
                        'Display',          'iter-detailed');

b = fminunc(@Lik,b0,options);

function [LL,Grad] = Lik(beta)
        
      phat = nan(n,J);  
      for k=1:J
          phat(:,k) = X(:,npar*(k-1)+1:npar*k)*beta;
      end
      phat = exp(phat)./kron( sum(exp(phat),2) , ones(1,J));
      
      phatn   = nan(n,1);
      g.sumexp  = zeros(n,1);
      g.sumexpX = zeros(n,npar);
      g.X       = zeros(n,npar);
      
      for k=1:J
          Xk  = X(:,npar*(k-1)+1:npar*k);
          XkB = X(:,npar*(k-1)+1:npar*k)*beta;
          % Vector with chosen choice probabilities: 
          phatn(yidx == k) = phat(yidx == k,k);
          g.X(yidx == k,:) = X(yidx == k,npar*(k-1)+1:npar*k);
          g.expXkB = exp(XkB);
          g.sumexp  = g.sumexp  + g.expXkB;
          g.sumexpX = g.sumexpX + kron(g.expXkB,ones(1,npar)).*Xk;
          
      end
      
      % Loglikelihood:
      LL = - log( phatn );
      LL = nansum(LL);
      
      % Gradient:
      Grad = - g.X + g.sumexpX./kron(g.sumexp,ones(1,npar));
      Grad = sum(Grad,1);
      
%     Yes = (y==1);
%     No  = (y==0);
%     NormCDF_yes = normcdf(X(Yes,:)*beta);
%     NormCDF_no  = 1-normcdf(X(No,:)*beta);
%     P_yes = log(NormCDF_yes);
%     P_no  = log(NormCDF_no);
%     
%     LL = sum(P_yes) + sum(P_no);
%     LL = - LL;
%     
%     % Gradient:
%     Grad = zeros(n,k);
%     
%     Grad(Yes,:) = kron(normpdf(X(Yes,:)*beta)./NormCDF_yes,ones(1,k)).*X(Yes,:);
%     Grad(No,:)  = kron(-normpdf(X(No,:)*beta)./NormCDF_no,ones(1,k)).*X(No,:);
%     
%     Grad        = - sum(Grad,1)';
    
end


% Delta = zeros(n,k);
% 
% Delta(y==1,:)  = kron(normpdf(X(y==1,:)*b)./normcdf(X(y==1,:)*b),ones(1,k)).*X(y==1,:);
% Delta(y==0,:)  = kron(-normpdf(X(y==0,:)*b)./(1-normcdf(X(y==0,:)*b)),ones(1,k)).*X(y==0,:);
% 
% I  = (1/n)*(Delta'*Delta);
% Vb = inv(I); 
% 
% sb = sqrt(diag(Vb)/n);
% 
% % Display results
% 
% if nargin>2
%     disp([varagin' num2cell(b) num2cell(sb) num2cell(b./sb)])
% end
% disp('---')
% fprintf(1,'%4s','N'); fprintf(1,'%12.0f',size(y,1)); fprintf(1,'\n');


end
